---
title: "07 Mixed Effects Model - Antenatal"
author: "Ye Shen, Radhika Tampi, Raj Vatsa"
date: "11/29/2021"
output: pdf_document
---

```{r, results = 'hide', message = FALSE}
knitr::opts_chunk$set("warning"=FALSE)
library(tidyverse)
library(survey)
library(WeightIt)
library(survey)
library(questionr) # use survey design in ggplot
library(patchwork)
library(cobalt)
library(gt)
library(lme4)
library(arm)
library(lmerTest)
library(optimx)
library(gridExtra)
```

## Read and Subset Cleaned Data

```{r}
antenatal_did<-readRDS("data/cleaned_antenatal_did.rds")

# Survey design of data set to account for sample weights
svy_antenatal_did <- svydesign(
  id = ~cluster, strata = ~group, weights = ~wmweight,
  data = antenatal_did
)

# Subsets of data from 2014 and 2016 surveys
svy_antenatal_2014<-subset(svy_antenatal_did, year==0)
svy_antenatal_2016<-subset(svy_antenatal_did, year==1)
```

### DiD regression with village-level random effects, adjusting for wealth index score
  * Outcome: Any antenatal consultation
  
```{r}
# Weighted linear mixed effects model on antenatal care seeking among pregnant women
# Adjusted for household wealth index score
did_antenatal_mixed <- lmer(antenatal.publichealthcenter.consultation ~ year * group + 
                              menage_score_bien_etre + 
                              (1 + year + group:year | cluster),
                           data = antenatal_did,
                           weights = wmweight,
                           control = lmerControl(optimizer = "optimx",
                                                 optCtrl = list(method = "L-BFGS-B")))
summary(did_antenatal_mixed)

# Unadjusted and unweighted DID regression on antenatal care seeking among pregnant women
did_antenatal_unwtd_unadj <- svyglm(antenatal.publichealthcenter.consultation ~ year*group, 
                                   design = svy_antenatal_did, 
                                   family = gaussian)
summary(did_antenatal_unwtd_unadj)

# Adjusted and unweighted DID regression on antenatal care seeking among pregnant women
did_antenatal_unwtd_adj <- svyglm(antenatal.publichealthcenter.consultation ~ year*group +
                                    menage_score_bien_etre, 
                                   design = svy_antenatal_did, 
                                   family = gaussian)
summary(did_antenatal_unwtd_adj)
```

## Table 3: Pregnant Women
 * Outcome: Any antenatal consultation

### Prepare Table 3
 

```{r}
table3_antenatal <- 
  as.data.frame(array(data = NA, dim = c(2, 22)))
# "Indicator","Study group", "Denominator", "Numerator", "Percent (SE)", "Difference 
# (p-value)","Denominator", "Numerator", "Percent (SE)", "Difference (p-value)","Percent 
# relative change", "Percent absolute change", "Difference in differences (p-value)"
colnames(table3_antenatal) <- 
  c("indicator", "group", "d_2014", "n_2014", "m_2014", "se_2014", "dif_2014", "p-val_2014", 
    "d_2016", "n_2016", "m_2016", "se_2016", "dif_2016", "p-val_2016", "trends_rel", 
    "trends_abs", "dif.in.dif(coef)_unadj_unwt", "p-val_did_unadj_unwt",
    "dif.in.dif(coef)_adj_unwt", "p-val_did_adj_unwt",
    "dif.in.dif(coef)_mixed", "p-val_did_mixed")
table3_antenatal$indicator <- "antenatal.publichealthcenter.consultation"

# Any visit (Access)
## Summary stats for 2014
table3_antenatal[rev(c(1,2)), 2] <- c(0,1)
table3_antenatal[rev(c(1,2)), 3] <- 
    round(svytable( ~group, design = svy_antenatal_2014)) # 'd_2014'
table3_antenatal[rev(c(1,2)), 4] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_2014, 
                     FUN = svytotal, keep.var = T, na.rm = T))) # 'n_2014'
table3_antenatal[rev(c(1,2)), 5] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_2014, 
                     FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'm_2014'
table3_antenatal[rev(c(1,2)), 6] <- 
    round(SE(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_2014, 
                   FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2014'
table3_antenatal[rev(c(1,2)), 7] <- table3_antenatal[1, 5] - table3_antenatal[2, 5]# 'dif_2014'

### Estimate p-value of the 2014 difference with a Chi2 test
  if (table3_antenatal[1, 4] == 0 & 
      table3_antenatal[2, 4] == 0) {
    table3_antenatal[2, 8] <- NA
  } else {
    table3_antenatal[2, 8] <- 
      round(svychisq(~antenatal.publichealthcenter.consultation+group, design = svy_antenatal_2014)$p.value,2)
  }
## Summary stats for 2016

table3_antenatal[rev(c(1,2)), 9] <- 
    round(svytable( ~group, design = svy_antenatal_2016)) # 'd_2016'
table3_antenatal[rev(c(1,2)), 10] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_2016, 
                     FUN = svytotal,keep.var = T, na.rm = T))) # 'n_2016'
table3_antenatal[rev(c(1,2)), 11] <- 
    round(coef(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_2016, 
                     FUN = svymean, keep.var = T, na.rm = T))*100,2) # 'm_2016'
table3_antenatal[rev(c(1,2)), 12] <- 
    round(SE(svyby(~antenatal.publichealthcenter.consultation, by = ~group, design = svy_antenatal_2016, 
                   FUN = svymean,keep.var = T, na.rm = T))*100,2) # 'se_2016'
table3_antenatal[rev(c(1,2)), 13] <- table3_antenatal[1, 11] - table3_antenatal[2, 11]# 'dif_2016'

### Estimate p-value of the 2016 difference with a Chi2 test
  if (table3_antenatal[1, 10] == 0 & 
      table3_antenatal[2, 10] == 0) {
    table3_antenatal[2, 14] <- NA
  } else {
    table3_antenatal[2, 14] <- 
      round(svychisq(~antenatal.publichealthcenter.consultation+group, design = svy_antenatal_2016)$p.value,2)
  }

## Estimate values for trends 2014-2016
  table3_antenatal[rev(c(1, 2)), 15] <- 
    round((table3_antenatal[rev(c(1, 2)), 11] - 
       table3_antenatal[rev(c(1, 2)), 5]) / 
    table3_antenatal[rev(c(1, 2)), 5]*100,2) # 'trends_rel'
  table3_antenatal[rev(c(1, 2)), 16] <- 
    table3_antenatal[rev(c(1, 2)), 11] - 
    table3_antenatal[rev(c(1, 2)), 5] # 'trends_abs'

## DiD stats 
  # Unadjusted DiD
    #coef
  table3_antenatal[2, 17] <- 
    round(summary(did_antenatal_unwtd_unadj)$coefficients[4, 1]*100,2)
    #P
  table3_antenatal[2, 18] <- 
    round(summary(did_antenatal_unwtd_unadj)$coefficients[4, 4],2)
  # Adjusted DiD
    #coef
  table3_antenatal[2, 19] <- 
    round(summary(did_antenatal_unwtd_adj)$coefficients[5, 1]*100,2)
    #P
  table3_antenatal[2, 20] <- 
    round(summary(did_antenatal_unwtd_adj)$coefficients[5, 4],2)
  # Mixed effects DiD: adjusted and weighted
    #coef
  table3_antenatal[2, 21] <- round(summary(did_antenatal_mixed)$coef[5,1] * 100, 2)
    #P
  table3_antenatal[2, 22] <- round(summary(did_antenatal_mixed)$coef[5,5], 2)
```

### Final Table 3 Antenatal
```{r}
# Replicate final Table 3
final_table3_antenatal<-data.frame(indicator=table3_antenatal$indicator,
                                   group=table3_antenatal$group,
                                   trends_rel= table3_antenatal$trends_rel,
                                   trends_abs= table3_antenatal$trends_abs, 
                                   d_2014=table3_antenatal$d_2014, 
                                   d_2016=table3_antenatal$d_2016)
final_table3_antenatal$m_2014 <- sprintf("%.2f (%.2f)", table3_antenatal$m_2014, 
                                       table3_antenatal$se_2014)
final_table3_antenatal$m_2016 <- sprintf("%.2f (%.2f)", table3_antenatal$m_2016, 
                                       table3_antenatal$se_2016)
final_table3_antenatal$dif_2014 <- ifelse(is.na(table3_antenatal$`p-val_2014`),"--" ,sprintf("%.2f (%.2f)", table3_antenatal$dif_2014,table3_antenatal$`p-val_2014`))
final_table3_antenatal$dif_2016 <- ifelse(is.na(table3_antenatal$`p-val_2016`),"--" ,sprintf("%.2f (%.2f)", table3_antenatal$dif_2016,table3_antenatal$`p-val_2016`))

final_table3_antenatal$`dif.in.dif(unadj_unwt)` <- ifelse(is.na(table3_antenatal$`p-val_did_unadj_unwt`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                  table3_antenatal$`dif.in.dif(coef)_unadj_unwt`,
                                                                table3_antenatal$`p-val_did_unadj_unwt`))

final_table3_antenatal$`dif.in.dif(adj_unwt)` <- ifelse(is.na(table3_antenatal$`p-val_did_adj_unwt`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                  table3_antenatal$`dif.in.dif(coef)_adj_unwt`,
                                                                table3_antenatal$`p-val_did_adj_unwt`))

final_table3_antenatal$`dif.in.dif(mixed)` <- ifelse(is.na(table3_antenatal$`p-val_did_mixed`),"--" , 
                                                        sprintf("%.2f (%.2f)", 
                                                                table3_antenatal$`dif.in.dif(coef)_mixed`, 
                                                                table3_antenatal$`p-val_did_mixed`))


final_table3_antenatal <- as_tibble(final_table3_antenatal) %>% 
  dplyr::select(-c(indicator)) %>%
  mutate(
    group = case_when(group == 1 ~ "IG", 
                      group == 0 ~ "NG")
   ) %>%
  rename("orig_DiD" = "dif.in.dif(unadj_unwt)",
         "orig_DiD_adj" = "dif.in.dif(adj_unwt)",
         "mixed_effects_DiD" = "dif.in.dif(mixed)")

col_order <- c("group", "d_2014", "d_2016", "m_2014", "m_2016", "dif_2014", "dif_2016",
               "trends_rel", "trends_abs", "orig_DiD", "orig_DiD_adj", "mixed_effects_DiD")
final_table3_antenatal <- final_table3_antenatal[, col_order]
saveRDS(final_table3_antenatal, 
        "table_fig/antenatal/final_table3antenatal_mixed.rds")  

gt(final_table3_antenatal ) %>%
  tab_header(title = "Table S8: Summary of responses and trends re: antenatal consultations at a PHF among pregnant women from the 2014 and 2016 IHOPE surveys") %>%
  tab_style(style = cell_text(align = "left"), location = cells_title("title")) %>%
  tab_spanner(
    label = "2014",
    columns = c(d_2014, m_2014, dif_2014)
  ) %>%
  tab_spanner(
    label = "2016",
    columns = c(d_2016, m_2016, dif_2016)
  ) %>%
  tab_spanner(
    label = "2014-2016 Trend",
    columns = c(trends_rel, trends_abs, orig_DiD, orig_DiD_adj, mixed_effects_DiD)
  ) %>%
  tab_style(style = cell_text(weight = "bold"), 
            location = cells_column_spanners(spanners = everything())) %>%
    cols_label(group = "Study group", d_2014 = "N", m_2014 = "Percent (SE)", 
               dif_2014 = " Difference (p-value)", d_2016 = "N", 
               m_2016 = "Percent (SE)", dif_2016 = " Difference (p-value)", 
               trends_rel = "Percent relative change", 
               trends_abs = "Percent absolute change", orig_DiD="Original Unadjusted DiD (p value) ",
               orig_DiD_adj="Original Adjusted DiD (p value) ",
               mixed_effects_DiD="Mixed Effects DiD (p value)") %>%
    tab_style(style = cell_text(weight = "bold"), 
              location = cells_column_labels(columns = everything())) %>%
    gt::gtsave("table_fig/paper/Supplemental/table_s8_antenatal_mixed_effect.png")
```

## Figure 3 Antenatal
```{r}
num_var <- 1
fig_antenatal_mixed <- as.data.frame(matrix(nrow = num_var * 4, ncol = 6))

###  UPDATE TABLE NAME AS NEEDED ###
fig_antenatal_mixed$V1 <- round(c(
  table3_antenatal$m_2014, table3_antenatal$m_2016), 2)
fig_antenatal_mixed$V2 <- round(c(
  table3_antenatal$n_2014, table3_antenatal$n_2016), 0)
fig_antenatal_mixed$V3 <- round(c(
  table3_antenatal$d_2014, table3_antenatal$d_2016), 0)
fig_antenatal_mixed$V4 <- rep(c("Intervention", "Non-Intervention"), num_var * 2)
fig_antenatal_mixed$V5 <- rep(c("2014 survey", "2016 survey"), each = num_var * 2)
fig_antenatal_mixed$V6 <- rep("Antenatal Consultations at a PHF", num_var * 4) ### RENAME BASED ON OUTCOME ###
colnames(fig_antenatal_mixed) <- c("mean", "num", "dem", "group", "year", "indicator")
fig_antenatal_mixed$group <- factor(fig_antenatal_mixed$group, levels = c("Non-Intervention", "Intervention"))
fig_antenatal_mixed <- 
  ggplot(fig_antenatal_mixed, 
         aes(x = mean, y = group, group = interaction(group, indicator), 
             label = sprintf("%s/%s", num, dem))) +
  geom_point(aes(colour = year), size = 3.5) +
  scale_colour_manual(breaks = fig_antenatal_mixed$year, 
                      values = c("2014 survey" = "black", "2016 survey" = "red")) +
  geom_line() +
  labs(
    x = "Percentage", y = "",
    title = "Mixed Effects DiD with Village-Level Random Effects:\nAntenatal Consultations at a PHF" ### UPDATE TITLE BASED ON OUTCOME ###
  ) +
  theme_bw() +
  scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10), expand = c(0, 0)) +
  scale_y_discrete(position = "right") +
  theme(
    legend.position = "bottom", axis.text = element_text(size = 10), 
    panel.grid.minor = element_blank(), legend.title = element_blank(),
    title = element_text(size = 12, face = "bold"), 
    axis.title = element_text(size = 13),
    legend.text = element_text(size = 12)
  )+ 
  annotate("text", label="{", x=50, y=1.5, size =20, fontface="plain")+
  annotate("text", label=paste0("Adjusted DiD (p-val): \n",final_table3_antenatal$mixed_effects_DiD[2]), x=35, y=1.5)

saveRDS(fig_antenatal_mixed,"table_fig/antenatal/fig_antenatal_mixed.rds")
ggsave("table_fig/antenatal/fig3antenatal_mixed.png")
```

## Antenatal Variation Figure

```{r}
# Empirical Bayes adjusted DiD estimates for each village
coefs_antenatal <- coef(did_antenatal_mixed)$cluster
coefs_antenatal$village <- rownames(coefs_antenatal)

# Identify villages with lowest and highest DiD estimates
village_min_antenatal <- which.min(coefs_antenatal$`year:group`)
village_max_antenatal <- which.max(coefs_antenatal$`year:group`)

# Plot for village with lowest DiD estimate
village_min_antenatal_table <- data.frame(village = rep(village_min_antenatal, 4),
                                time = c(2014, 2014, 2016, 2016),
                                intervention = c("Non-Intervention", 
                                                 "Intervention",
                                                 "Non-Intervention", 
                                                 "Intervention"),
                                outcome = c(coefs_antenatal$`(Intercept)`[village_min_antenatal],
                                            coefs_antenatal$`(Intercept)`[village_min_antenatal] + 
                                              coefs_antenatal$group[village_min_antenatal],
                                            coefs_antenatal$`(Intercept)`[village_min_antenatal] + 
                                              coefs_antenatal$year[village_min_antenatal],
                                            coefs_antenatal$`(Intercept)`[village_min_antenatal] + 
                                              coefs_antenatal$year[village_min_antenatal] +
                                              coefs_antenatal$group[village_min_antenatal] + 
                                              coefs_antenatal$`year:group`[village_min_antenatal]))
village_min_antenatal_plot <- 
  ggplot(village_min_antenatal_table, mapping = aes(x = time, y = outcome, color = factor(intervention))) + 
  geom_point() + geom_line() + 
  geom_segment(aes(x = 2014, 
                   y = coefs_antenatal$`(Intercept)`[village_min_antenatal] + 
                     coefs_antenatal$group[village_min_antenatal], 
                   xend = 2016, 
                   yend = 1), 
               color = "black", lty = 2) + 
  geom_segment(aes(x = 2016, 
                   y = 1, 
                   xend = 2016, 
                   yend = coefs_antenatal$`(Intercept)`[village_min_antenatal] + 
                     coefs_antenatal$year[village_min_antenatal] +
                     coefs_antenatal$group[village_min_antenatal] +
                     coefs_antenatal$`year:group`[village_min_antenatal]), 
               color = "firebrick", lty = 3) + 
  scale_color_manual(values = c("Intervention" = "cornflowerblue", 
                                   "Non-Intervention" = "forestgreen")) + 
  scale_x_continuous(breaks = c(2014, 2016)) + ylim(0.8,1) + 
  labs(x = "Year", y = "Proportion",
       title = "Pregnant Women With Any Antenatal\nConsultation at a PHF",
       subtitle = paste0("Village #", village_min_antenatal),
       color = "Study Group") + 
  theme_bw() + theme(legend.position = "bottom", panel.grid = element_blank(),
                     plot.subtitle = element_text(face = "bold")) + 
  annotate("text", label = "DiD\nEstimate",
           x = 2015.875, y = 0.97, size = 3, color = "firebrick")
saveRDS(village_min_antenatal_plot, 
        "table_fig/antenatal/village_min_antenatal_plot.rds") 

# Plot for village with highest DiD estimate
village_max_antenatal_table <- data.frame(village = rep(village_max_antenatal, 4),
                                time = c(2014, 2014, 2016, 2016),
                                intervention = c("Non-Intervention", 
                                                 "Intervention",
                                                 "Non-Intervention", 
                                                 "Intervention"),
                                outcome = c(coefs_antenatal$`(Intercept)`[village_max_antenatal],
                                            coefs_antenatal$`(Intercept)`[village_max_antenatal] + 
                                              coefs_antenatal$group[village_max_antenatal],
                                            coefs_antenatal$`(Intercept)`[village_max_antenatal] + 
                                              coefs_antenatal$year[village_max_antenatal],
                                            coefs_antenatal$`(Intercept)`[village_max_antenatal] + 
                                              coefs_antenatal$year[village_max_antenatal] +
                                              coefs_antenatal$group[village_max_antenatal] + 
                                              coefs_antenatal$`year:group`[village_max_antenatal]))
village_max_antenatal_plot <- 
  ggplot(village_max_antenatal_table, mapping = aes(x = time, y = outcome, color = factor(intervention))) + 
  geom_point() + geom_line() + 
  geom_segment(aes(x = 2014, 
                   y = coefs_antenatal$`(Intercept)`[village_max_antenatal] + 
                     coefs_antenatal$group[village_max_antenatal], 
                   xend = 2016, 
                   yend = coefs_antenatal$`(Intercept)`[village_max_antenatal] + 
                     coefs_antenatal$year[village_max_antenatal] +
                     coefs_antenatal$group[village_max_antenatal]), 
               color = "black", lty = 2) + 
  geom_segment(aes(x = 2016, 
                   y = coefs_antenatal$`(Intercept)`[village_max_antenatal] + 
                     coefs_antenatal$year[village_max_antenatal] +
                     coefs_antenatal$group[village_max_antenatal], 
                   xend = 2016, 
                   yend = coefs_antenatal$`(Intercept)`[village_max_antenatal] + 
                     coefs_antenatal$year[village_max_antenatal] +
                     coefs_antenatal$group[village_max_antenatal] +
                     coefs_antenatal$`year:group`[village_max_antenatal]), 
               color = "firebrick", lty = 3) + 
  scale_color_manual(values = c("Intervention" = "cornflowerblue", 
                                   "Non-Intervention" = "forestgreen")) + 
  scale_x_continuous(breaks = c(2014, 2016)) + 
  labs(x = "Year", y = "Proportion",
       title = "Pregnant Women With Any Antenatal\nConsultation at a PHF",
       subtitle = paste0("Village #", village_max_antenatal),
       color = "Study Group") + 
  theme_bw() + theme(legend.position = "bottom", panel.grid = element_blank(),
                     plot.subtitle = element_text(face = "bold")) + 
  annotate("text", label = "DiD\nEstimate",
           x = 2015.875, y = 0.6, size = 3, color = "firebrick")
saveRDS(village_max_antenatal_plot, 
        "table_fig/antenatal/village_max_antenatal_plot.rds") 

# Plot for village with mean DiD estimate
village_median_antenatal_table <- data.frame(village = rep("Mean", 4),
                                time = c(2014, 2014, 2016, 2016),
                                intervention = c("Non-Intervention", 
                                                 "Intervention",
                                                 "Non-Intervention", 
                                                 "Intervention"),
                                outcome = c(fixef(did_antenatal_mixed)[1],
                                            fixef(did_antenatal_mixed)[1] + 
                                              fixef(did_antenatal_mixed)[3],
                                            fixef(did_antenatal_mixed)[1] + 
                                              fixef(did_antenatal_mixed)[2],
                                            fixef(did_antenatal_mixed)[1] + 
                                              fixef(did_antenatal_mixed)[2] +
                                              fixef(did_antenatal_mixed)[3] + 
                                              fixef(did_antenatal_mixed)[5]))
village_median_antenatal_plot <-
  ggplot(village_median_antenatal_table, mapping = aes(x = time, y = outcome, color = factor(intervention))) + 
  geom_point() + geom_line() + 
  geom_segment(aes(x = 2014, 
                   y = fixef(did_antenatal_mixed)[1] + 
                     fixef(did_antenatal_mixed)[3], 
                   xend = 2016, 
                   yend = fixef(did_antenatal_mixed)[1] + 
                     fixef(did_antenatal_mixed)[2] +
                     fixef(did_antenatal_mixed)[3]), 
               color = "black", lty = 2) + 
  geom_segment(aes(x = 2016, 
                   y = fixef(did_antenatal_mixed)[1] + 
                     fixef(did_antenatal_mixed)[2] +
                     fixef(did_antenatal_mixed)[3], 
                   xend = 2016, 
                   yend = fixef(did_antenatal_mixed)[1] + 
                     fixef(did_antenatal_mixed)[2] +
                     fixef(did_antenatal_mixed)[3] +
                     fixef(did_antenatal_mixed)[5]), 
               color = "firebrick", lty = 3) + 
  scale_color_manual(values = c("Intervention" = "cornflowerblue", 
                                   "Non-Intervention" = "forestgreen")) + 
  scale_x_continuous(breaks = c(2014, 2016)) + 
  labs(x = "Year", y = "Proportion",
       title = "Pregnant Women With Any Antenatal\nConsultation at a PHF",
       subtitle = "Median Village",
       color = "Study Group") + 
  theme_bw() + theme(legend.position = "bottom", panel.grid = element_blank(),
                     plot.subtitle = element_text(face = "bold")) + 
  annotate("text", label = "DiD\nEstimate",
           x = 2015.875, y = 0.85, size = 3, color = "firebrick")
saveRDS(village_median_antenatal_plot, 
        "table_fig/antenatal/village_median_antenatal_plot.rds") 

# Save panel of plots
ggsave("table_fig/antenatal/antenatal_mixed_variation.png", 
       arrangeGrob(village_min_antenatal_plot, village_median_antenatal_plot, village_max_antenatal_plot, ncol = 3),
       width = 15, height = 7.5, units = "in")
```